Vertical 2: Inequity and Disparity

Work Notebook

Notebook Setup

library(knitr)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(dplyr)
library(readxl)
library(skimr)
library(moderndive)

###Data Manipulations

##Import merged data across data sets 3 and 4

joined_data <- read_excel("C:/Users/User/Downloads/joined_data.xlsx")
View(joined_data)

##Created Age Proportions for Counties

age_proportions <- joined_data %>% rename('pop_fmless5' = 'PopFmle<52010',  
                                          'pop_mless5' = 'PopMale<52010', 
                                          'pop_fm5to9' = 'PopFmle5-92010',  
                                          'pop_m5to9' = 'PopMale5-92010',   
                                          'pop_m10to14' = 'PopMale10-142010',   
                                          'pop_fm10to14' = 'PopFmle10-142010', 
                                          'pop_m15to19' = 'PopMale15-192010', 
                                          'pop_fm15to19' = 'PopFmle15-192010',  
                                          'pop_m20to24' = 'PopMale20-242010', 
                                          'pop_fm20to24' = 'PopFmle20-242010',
                                          'pop_m25to29' = 'PopMale25-292010',   
                                          'pop_fm25to29' = 'PopFmle25-292010', 
                                          'pop_m30to34' = 'PopMale30-342010',       
                                          'pop_fm30to34' = 'PopFmle30-342010',  
                                          'pop_m35to44' = 'PopMale35-442010',   
                                          'pop_fm35to44' = 'PopFmle35-442010',  
                                          'pop_m45to54' = 'PopMale45-542010',   
                                          'pop_fm45to54' = 'PopFmle45-542010',  
                                          'pop_m55to59' = 'PopMale55-592010',   
                                          'pop_fm55to59' = 'PopFmle55-592010',  
                                          'pop_m60to64' = 'PopMale60-642010',   
                                          'pop_fm60to64' = 'PopFmle60-642010', 
                                          'pop_m65to74' = 'PopMale65-742010',   
                                          'pop_fm65to74' = 'PopFmle65-742010',  
                                          'pop_m75to84' = 'PopMale75-842010',   
                                          'pop_fm75to84' = 'PopFmle75-842010',  
                                          'pop_mover84' = 'PopMale>842010', 
                                          'pop_fmover84' = 'PopFmle>842010') %>%
  mutate(proportion_fmless_5 = pop_fmless5 / CensusPopulation2010,
         proportion_mless_5 = pop_mless5 / CensusPopulation2010, 
         proportion_fm5to9 = pop_fm5to9 / CensusPopulation2010,
         proportion_m5to9 = pop_m5to9 / CensusPopulation2010,
         proportion_m10to14 = pop_m10to14 / CensusPopulation2010,
         proportion_fm10to14 = pop_fm10to14 / CensusPopulation2010,
         proportion_m15to19 = pop_m15to19 / CensusPopulation2010,
         proportion_fm15to19 = pop_fm15to19 / CensusPopulation2010,
         proportion_m20to24 = pop_m20to24 / CensusPopulation2010,
         proportion_fm20to24 = pop_fm20to24 / CensusPopulation2010,
         proportion_m25to29 = pop_m25to29 / CensusPopulation2010,
         proportion_fm25to29 = pop_m25to29 / CensusPopulation2010,
         proportion_m30to34 = pop_m30to34 / CensusPopulation2010,
         proportion_fm30to34 = pop_fm30to34 / CensusPopulation2010,
         proportion_m35to44 = pop_m35to44 / CensusPopulation2010,
         proportion_fm35to44 = pop_m35to44 / CensusPopulation2010,
         proportion_m45to54 = pop_m45to54 / CensusPopulation2010,
         proportion_fm45to54 = pop_fm45to54 / CensusPopulation2010,
         proportion_m55to59 = pop_m55to59 / CensusPopulation2010,
         proportion_fm55to59 = pop_fm55to59 / CensusPopulation2010,
         proportion_m60to64 = pop_m60to64 / CensusPopulation2010,
         proportion_fm60to64 = pop_fm60to64 / CensusPopulation2010,
         proportion_m65to74 = pop_m65to74 / CensusPopulation2010,
         proportion_fm65to74 = pop_fm65to74 / CensusPopulation2010,
         proportion_m75to84 = pop_m75to84 / CensusPopulation2010,
         proportion_fm75to84 = pop_fm75to84 / CensusPopulation2010,
         proportion_mover84 = pop_mover84 / CensusPopulation2010,
         proportion_fmover84 = pop_fmover84 / CensusPopulation2010,
         ) 
ages_over_60 <- age_proportions %>% mutate(proportion_fmover60 = proportion_fm60to64 + proportion_fm65to74 + proportion_fm75to84 + proportion_fm75to84, proportion_mover60 = proportion_m60to64 + proportion_m65to74 + proportion_m75to84 + proportion_m75to84)
View(age_proportions)
write.csv(age_proportions,"C:\\Users\\User\\Desktop\\age_proportions.csv", row.names = FALSE)
write.csv(ages_over_60, "C:\\Users\\User\\Desktop\\ages_over_60.csv", row.names = FALSE)

##Created COVID Death Rate

death_rate<- age_proportions %>% mutate(covid_mortality_rate = tot_deaths / tot_cases)
View(death_rate)
write.csv(death_rate,"C:\\Users\\User\\Desktop\\death_rate.csv", row.names = FALSE)

death_rate_over_60 <- ages_over_60 %>% mutate(covid_mortality_rate = tot_deaths / tot_cases)
View(death_rate_over_60)
write.csv(death_rate_over_60,"C:\\Users\\User\\Desktop\\death_rate_over_60.csv", row.names = FALSE)

##Created Race columns

race_populations <- death_rate %>% mutate(pop_white = RACE_White*Population, pop_black = RACE_Black*Population, pop_amer_indian_alaska_native = `RACE_American_Indian_ Alaska_Native`*Population, pop_asian = RACE_Asian*Population, pop_native_hawaiian_pacific_islander = RACE_Native_Hawaiian_Other_Pacific_Islander*Population, pop_other = `RACE_ Other`*Population)
#glimpse(race_populations)
write.csv(race_populations,"C:\\Users\\User\\Desktop\\race_populations.csv", row.names = FALSE)

races <- race_populations %>% select(state = REGION, population = Population, white = pop_white, black = pop_black, native = pop_amer_indian_alaska_native, asian = pop_asian, pacific_islander = pop_native_hawaiian_pacific_islander, other = pop_other)

##Spread of Covid Mortality Rate by State

States with the largest spread across counties include New York and Michigan. Other states with high death rates are all near New York, including Connecticut, Maryland, New Jersey, and Pennsylvania.

ggplot(data = death_rate, aes(x=REGION, y=covid_mortality_rate,)) + 
  geom_boxplot() + 
    theme(axis.text.x = element_text(angle=60, hjust=1))

##Regression Analysis: Covid Mortality Rate by State

Taking the previous graph and applying regression analysis, there appears to be Several states with significant Covid Mortality Rates. This led us to review a selection of these states specifically. R^2 is about .80 which tells us our model for these states is a good fit, as it explains 80% of the variance.

MI p 0.010 < 0.05 NY, Connecticut, Massachusetts, New Jersey, 0.00 < 0.05 Penn p 0.006 < 0.05

model_death_regions <- lm(covid_mortality_rate ~ REGION, data = death_rate)
get_regression_table(model_death_regions)
get_regression_summaries(model_death_regions)
ggplot(death_rate, aes(x=REGION, y = covid_mortality_rate)) + geom_point() + 
  theme(axis.text.x = element_text(angle=60, hjust=1))

##Filtered State Specific Data New York

ny_data <- death_rate %>% filter(REGION == "New York")
glimpse(ny_data)
## Rows: 9
## Columns: 110
## $ Key                                                  <dbl> 94, 101, 206, ...
## $ REGION                                               <chr> "New York", "N...
## $ COUNTY                                               <chr> "Queens", "Wes...
## $ Population                                           <dbl> 2278906, 96761...
## $ SEX_Male                                             <dbl> 0.485, 0.485, ...
## $ SEX_Female                                           <dbl> 0.515, 0.515, ...
## $ RACE_One_Race                                        <dbl> 0.962, 0.970, ...
## $ RACE_White                                           <dbl> 0.372, 0.631, ...
## $ RACE_Black                                           <dbl> 0.184, 0.149, ...
## $ `RACE_American_Indian_ Alaska_Native`                <dbl> 0.004, 0.003, ...
## $ RACE_Asian                                           <dbl> 0.258, 0.061, ...
## $ RACE_Native_Hawaiian_Other_Pacific_Islander          <dbl> 0.000, 0.000, ...
## $ `RACE_ Other`                                        <dbl> 0.143, 0.126, ...
## $ RACE_TWO_PLUS                                        <dbl> 0.038, 0.030, ...
## $ RACE_Hispanic_Latino                                 <dbl> 0.281, 0.251, ...
## $ White_Alone                                          <dbl> 0.247, 0.529, ...
## $ LANGUAGE_Population_5_Years_and_Over                 <dbl> 2137881, 91387...
## $ LANGUAGE_English_Only                                <dbl> 0.436, 0.654, ...
## $ `LANGUAGE_Other_Than_ English`                       <dbl> 0.564, 0.346, ...
## $ LANGUAGE_Limited_English                             <dbl> 0.294, 0.124, ...
## $ INCOME_12_MONTHS_Population                          <dbl> 788110, 352498...
## $ INCOME_12_MONTHS_With_Earnings                       <dbl> 0.809, 0.794, ...
## $ INCOME_12_MONTHS_Mean_Dollars_Earned                 <dbl> 93677, 155345,...
## $ INCOME_12_MONTHS_WITH_Social_Security                <dbl> 0.288, 0.328, ...
## $ INCOME_12_MONTHS_SS_Mean_Dollars                     <dbl> 18027, 21926, ...
## $ INCOME_12_MONTHS_With_Suppl_Security_Income          <dbl> 0.056, 0.049, ...
## $ INCOME_12_MONTHS_Mean_Suppl_Sec_Dollars              <dbl> 9592, 9638, 88...
## $ INCOME_12_MONTHS_With_Cash_Public_Assistance         <dbl> 0.028, 0.021, ...
## $ INCOME_12_MONTHS_Mean_Cash_Public_Assistance_Dollars <dbl> 3196, 3975, 32...
## $ INCOME_12_MONTHS_With_Retirement_Income              <dbl> 0.152, 0.194, ...
## $ INCOME_12_MONTHS_Mean_Retirement_Income_Dollars      <dbl> 26479, 40134, ...
## $ INCOME_12_MONTHS_With_Food_Stamps                    <dbl> 0.130, 0.091, ...
## $ POVERTY_STATUS_Population                            <dbl> 2250553, 94387...
## $ POVERTY_STATUS_Below_100_Percent_Level               <dbl> 0.115, 0.082, ...
## $ POVERTY_STATUS_100_to_149_Percent_Level              <dbl> 0.090, 0.059, ...
## $ POVERTY_STATUS_Above_150_Percent_Level               <dbl> 0.795, 0.860, ...
## $ STATEFP                                              <dbl> 36, 36, 36, 36...
## $ CountyName                                           <chr> "Queens", "Wes...
## $ State                                                <chr> "New York", "N...
## $ lat                                                  <dbl> 40.68353, 41.1...
## $ lon                                                  <dbl> -73.81471, -73...
## $ PopulationEstimate2018                               <dbl> 2278906, 96761...
## $ PopTotalMale2017                                     <dbl> 1143190, 47493...
## $ PopTotalFemale2017                                   <dbl> 1215392, 50530...
## $ `PopulationEstimate65+2017`                          <dbl> 354809, 162696...
## $ CensusPopulation2010                                 <dbl> 2230722, 94911...
## $ MedianAge2010                                        <dbl> 37.2, 40.0, 32...
## $ dem_to_rep_ratio                                     <dbl> 3.4633490, 2.0...
## $ `#Hospitals`                                         <dbl> 6, 9, 6, 6, 10...
## $ `#ICU_beds`                                          <dbl> 129, 179, 270,...
## $ pop_fmless5                                          <dbl> 64573, 27880, ...
## $ pop_mless5                                           <dbl> 67891, 29319, ...
## $ pop_fm5to9                                           <dbl> 60360, 30949, ...
## $ pop_m5to9                                            <dbl> 63406, 32263, ...
## $ pop_m10to14                                          <dbl> 62931, 33569, ...
## $ pop_fm10to14                                         <dbl> 60475, 32111, ...
## $ pop_m15to19                                          <dbl> 71846, 33615, ...
## $ pop_fm15to19                                         <dbl> 67250, 31701, ...
## $ pop_m20to24                                          <dbl> 81879, 27464, ...
## $ pop_fm20to24                                         <dbl> 78996, 26116, ...
## $ pop_m25to29                                          <dbl> 91793, 26424, ...
## $ pop_fm25to29                                         <dbl> 93124, 26230, ...
## $ pop_m30to34                                          <dbl> 88286, 27104, ...
## $ pop_fm30to34                                         <dbl> 88927, 28255, ...
## $ pop_m35to44                                          <dbl> 162445, 63991,...
## $ pop_fm35to44                                         <dbl> 163834, 68993,...
## $ pop_m45to54                                          <dbl> 156130, 71688,...
## $ pop_fm45to54                                         <dbl> 166754, 77344,...
## $ pop_m55to59                                          <dbl> 64318, 29234, ...
## $ pop_fm55to59                                         <dbl> 72866, 32554, ...
## $ pop_m60to64                                          <dbl> 53310, 25009, ...
## $ pop_fm60to64                                         <dbl> 63182, 28178, ...
## $ pop_m65to74                                          <dbl> 65634, 30471, ...
## $ pop_fm65to74                                         <dbl> 84990, 38295, ...
## $ pop_m75to84                                          <dbl> 36510, 19363, ...
## $ pop_fm75to84                                         <dbl> 56837, 28266, ...
## $ pop_mover84                                          <dbl> 13424, 7147, 5...
## $ pop_fmover84                                         <dbl> 28751, 15580, ...
## $ SVIPercentile                                        <dbl> 0.6201, 0.5516...
## $ tot_deaths                                           <dbl> 7282, 1463, 49...
## $ tot_cases                                            <dbl> 74632, 39200, ...
## $ proportion_fmless_5                                  <dbl> 0.02894713, 0....
## $ proportion_mless_5                                   <dbl> 0.03043454, 0....
## $ proportion_fm5to9                                    <dbl> 0.02705850, 0....
## $ proportion_m5to9                                     <dbl> 0.02842398, 0....
## $ proportion_m10to14                                   <dbl> 0.02821105, 0....
## $ proportion_fm10to14                                  <dbl> 0.02711006, 0....
## $ proportion_m15to19                                   <dbl> 0.03220751, 0....
## $ proportion_fm15to19                                  <dbl> 0.03014719, 0....
## $ proportion_m20to24                                   <dbl> 0.03670516, 0....
## $ proportion_fm20to24                                  <dbl> 0.03541275, 0....
## $ proportion_m25to29                                   <dbl> 0.04114946, 0....
## $ proportion_fm25to29                                  <dbl> 0.04114946, 0....
## $ proportion_m30to34                                   <dbl> 0.03957732, 0....
## $ proportion_fm30to34                                  <dbl> 0.03986467, 0....
## $ proportion_m35to44                                   <dbl> 0.07282171, 0....
## $ proportion_fm35to44                                  <dbl> 0.07282171, 0....
## $ proportion_m45to54                                   <dbl> 0.06999079, 0....
## $ proportion_fm45to54                                  <dbl> 0.07475338, 0....
## $ proportion_m55to59                                   <dbl> 0.02883282, 0....
## $ proportion_fm55to59                                  <dbl> 0.03266476, 0....
## $ proportion_m60to64                                   <dbl> 0.02389809, 0....
## $ proportion_fm60to64                                  <dbl> 0.02832357, 0....
## $ proportion_m65to74                                   <dbl> 0.02942276, 0....
## $ proportion_fm65to74                                  <dbl> 0.03809977, 0....
## $ proportion_m75to84                                   <dbl> 0.01636690, 0....
## $ proportion_fm75to84                                  <dbl> 0.02547919, 0....
## $ proportion_mover84                                   <dbl> 0.006017783, 0...
## $ proportion_fmover84                                  <dbl> 0.01288865, 0....
## $ covid_mortality_rate                                 <dbl> 0.09757209, 0....
write.csv(ny_data,"C:\\Users\\User\\Desktop\\nydata.csv", row.names = FALSE)

NY Covid Mortality Rate

ny_covid_mortality <- ny_data %>% select(COUNTY, tot_cases, tot_deaths, covid_mortality_rate)
write.csv(ny_covid_mortality,"C:\\Users\\User\\Desktop\\ny_covid_mortality.csv", row.names = FALSE)

Michigan

mi_data <- death_rate %>% filter(REGION == "Michigan")
glimpse(mi_data)
## Rows: 4
## Columns: 110
## $ Key                                                  <dbl> 66, 353, 472, 934
## $ REGION                                               <chr> "Michigan", "M...
## $ COUNTY                                               <chr> "Wayne", "Maco...
## $ Population                                           <dbl> 1753893, 87475...
## $ SEX_Male                                             <dbl> 0.482, 0.487, ...
## $ SEX_Female                                           <dbl> 0.518, 0.513, ...
## $ RACE_One_Race                                        <dbl> 0.974, 0.977, ...
## $ RACE_White                                           <dbl> 0.531, 0.805, ...
## $ RACE_Black                                           <dbl> 0.385, 0.121, ...
## $ `RACE_American_Indian_ Alaska_Native`                <dbl> 0.003, 0.003, ...
## $ RACE_Asian                                           <dbl> 0.034, 0.042, ...
## $ RACE_Native_Hawaiian_Other_Pacific_Islander          <dbl> 0.000, 0.000, ...
## $ `RACE_ Other`                                        <dbl> 0.020, 0.006, ...
## $ RACE_TWO_PLUS                                        <dbl> 0.026, 0.023, ...
## $ RACE_Hispanic_Latino                                 <dbl> 0.061, 0.027, ...
## $ White_Alone                                          <dbl> 0.493, 0.785, ...
## $ LANGUAGE_Population_5_Years_and_Over                 <dbl> 1639143, 82711...
## $ LANGUAGE_English_Only                                <dbl> 0.847, 0.857, ...
## $ `LANGUAGE_Other_Than_ English`                       <dbl> 0.153, 0.143, ...
## $ LANGUAGE_Limited_English                             <dbl> 0.057, 0.056, ...
## $ INCOME_12_MONTHS_Population                          <dbl> 687546, 347508...
## $ INCOME_12_MONTHS_With_Earnings                       <dbl> 0.706, 0.768, ...
## $ INCOME_12_MONTHS_Mean_Dollars_Earned                 <dbl> 72076, 80654, ...
## $ INCOME_12_MONTHS_WITH_Social_Security                <dbl> 0.340, 0.343, ...
## $ INCOME_12_MONTHS_SS_Mean_Dollars                     <dbl> 18310, 20118, ...
## $ INCOME_12_MONTHS_With_Suppl_Security_Income          <dbl> 0.089, 0.059, ...
## $ INCOME_12_MONTHS_Mean_Suppl_Sec_Dollars              <dbl> 9634, 10308, 1...
## $ INCOME_12_MONTHS_With_Cash_Public_Assistance         <dbl> 0.026, 0.019, ...
## $ INCOME_12_MONTHS_Mean_Cash_Public_Assistance_Dollars <dbl> 2385, 1956, 46...
## $ INCOME_12_MONTHS_With_Retirement_Income              <dbl> 0.208, 0.223, ...
## $ INCOME_12_MONTHS_Mean_Retirement_Income_Dollars      <dbl> 23556, 22089, ...
## $ INCOME_12_MONTHS_With_Food_Stamps                    <dbl> 0.223, 0.108, ...
## $ POVERTY_STATUS_Population                            <dbl> 1733848, 86622...
## $ POVERTY_STATUS_Below_100_Percent_Level               <dbl> 0.216, 0.111, ...
## $ POVERTY_STATUS_100_to_149_Percent_Level              <dbl> 0.100, 0.074, ...
## $ POVERTY_STATUS_Above_150_Percent_Level               <dbl> 0.684, 0.815, ...
## $ STATEFP                                              <dbl> 26, 26, 26, 26
## $ CountyName                                           <chr> "Wayne", "Maco...
## $ State                                                <chr> "Michigan", "M...
## $ lat                                                  <dbl> 42.29537, 42.7...
## $ lon                                                  <dbl> -83.28590, -82...
## $ PopulationEstimate2018                               <dbl> 1753893, 87475...
## $ PopTotalMale2017                                     <dbl> 844123, 424217...
## $ PopTotalFemale2017                                   <dbl> 909493, 447158...
## $ `PopulationEstimate65+2017`                          <dbl> 265150, 144709...
## $ CensusPopulation2010                                 <dbl> 1820584, 84097...
## $ MedianAge2010                                        <dbl> 37.3, 39.9, 34...
## $ dem_to_rep_ratio                                     <dbl> 2.2683837, 0.7...
## $ `#Hospitals`                                         <dbl> 14, 4, 3, 12
## $ `#ICU_beds`                                          <dbl> 586, 122, 150,...
## $ pop_fmless5                                          <dbl> 58317, 23876, ...
## $ pop_mless5                                           <dbl> 60133, 24939, ...
## $ pop_fm5to9                                           <dbl> 60693, 25757, ...
## $ pop_m5to9                                            <dbl> 63411, 27001, ...
## $ pop_m10to14                                          <dbl> 66963, 28924, ...
## $ pop_fm10to14                                         <dbl> 64157, 27321, ...
## $ pop_m15to19                                          <dbl> 73765, 29301, ...
## $ pop_fm15to19                                         <dbl> 71148, 27506, ...
## $ pop_m20to24                                          <dbl> 59269, 24979, ...
## $ pop_fm20to24                                         <dbl> 60711, 24532, ...
## $ pop_m25to29                                          <dbl> 52614, 24719, ...
## $ pop_fm25to29                                         <dbl> 56486, 25270, ...
## $ pop_m30to34                                          <dbl> 52457, 25339, ...
## $ pop_fm30to34                                         <dbl> 57236, 26065, ...
## $ pop_m35to44                                          <dbl> 118465, 58382,...
## $ pop_fm35to44                                         <dbl> 126427, 60057,...
## $ pop_m45to54                                          <dbl> 130304, 65341,...
## $ pop_fm45to54                                         <dbl> 138695, 67514,...
## $ pop_m55to59                                          <dbl> 57367, 27640, ...
## $ pop_fm55to59                                         <dbl> 63055, 29006, ...
## $ pop_m60to64                                          <dbl> 45829, 22487, ...
## $ pop_fm60to64                                         <dbl> 52379, 24842, ...
## $ pop_m65to74                                          <dbl> 52156, 27670, ...
## $ pop_fm65to74                                         <dbl> 65399, 33422, ...
## $ pop_m75to84                                          <dbl> 30602, 16363, ...
## $ pop_fm75to84                                         <dbl> 48227, 24440, ...
## $ pop_mover84                                          <dbl> 10826, 5627, 3...
## $ pop_fmover84                                         <dbl> 23493, 12658, ...
## $ SVIPercentile                                        <dbl> 0.8723, 0.3296...
## $ tot_deaths                                           <dbl> 3017, 1055, 17...
## $ tot_cases                                            <dbl> 37702, 16548, ...
## $ proportion_fmless_5                                  <dbl> 0.03203203, 0....
## $ proportion_mless_5                                   <dbl> 0.03302951, 0....
## $ proportion_fm5to9                                    <dbl> 0.03333711, 0....
## $ proportion_m5to9                                     <dbl> 0.03483003, 0....
## $ proportion_m10to14                                   <dbl> 0.03678105, 0....
## $ proportion_fm10to14                                  <dbl> 0.03523979, 0....
## $ proportion_m15to19                                   <dbl> 0.04051722, 0....
## $ proportion_fm15to19                                  <dbl> 0.03907977, 0....
## $ proportion_m20to24                                   <dbl> 0.03255494, 0....
## $ proportion_fm20to24                                  <dbl> 0.03334699, 0....
## $ proportion_m25to29                                   <dbl> 0.02889952, 0....
## $ proportion_fm25to29                                  <dbl> 0.02889952, 0....
## $ proportion_m30to34                                   <dbl> 0.02881328, 0....
## $ proportion_fm30to34                                  <dbl> 0.03143826, 0....
## $ proportion_m35to44                                   <dbl> 0.06506978, 0....
## $ proportion_fm35to44                                  <dbl> 0.06506978, 0....
## $ proportion_m45to54                                   <dbl> 0.07157264, 0....
## $ proportion_fm45to54                                  <dbl> 0.07618160, 0....
## $ proportion_m55to59                                   <dbl> 0.03151022, 0....
## $ proportion_fm55to59                                  <dbl> 0.03463449, 0....
## $ proportion_m60to64                                   <dbl> 0.02517269, 0....
## $ proportion_fm60to64                                  <dbl> 0.02877044, 0....
## $ proportion_m65to74                                   <dbl> 0.02864795, 0....
## $ proportion_fm65to74                                  <dbl> 0.03592199, 0....
## $ proportion_m75to84                                   <dbl> 0.01680889, 0....
## $ proportion_fm75to84                                  <dbl> 0.02648985, 0....
## $ proportion_mover84                                   <dbl> 0.005946444, 0...
## $ proportion_fmover84                                  <dbl> 0.01290410, 0....
## $ covid_mortality_rate                                 <dbl> 0.08002228, 0....
write.csv(mi_data,"C:\\Users\\User\\Desktop\\midata.csv", row.names = FALSE)

California

ca_data <- death_rate %>% filter(REGION == "California")
glimpse(ca_data)
## Rows: 16
## Columns: 110
## $ Key                                                  <dbl> 3, 10, 185, 30...
## $ REGION                                               <chr> "California", ...
## $ COUNTY                                               <chr> "Orange", "Sac...
## $ Population                                           <dbl> 3185968, 15409...
## $ SEX_Male                                             <dbl> 0.494, 0.489, ...
## $ SEX_Female                                           <dbl> 0.506, 0.511, ...
## $ RACE_One_Race                                        <dbl> 0.958, 0.925, ...
## $ RACE_White                                           <dbl> 0.604, 0.567, ...
## $ RACE_Black                                           <dbl> 0.019, 0.099, ...
## $ `RACE_American_Indian_ Alaska_Native`                <dbl> 0.004, 0.007, ...
## $ RACE_Asian                                           <dbl> 0.209, 0.161, ...
## $ RACE_Native_Hawaiian_Other_Pacific_Islander          <dbl> 0.003, 0.012, ...
## $ `RACE_ Other`                                        <dbl> 0.119, 0.078, ...
## $ RACE_TWO_PLUS                                        <dbl> 0.042, 0.075, ...
## $ RACE_Hispanic_Latino                                 <dbl> 0.342, 0.234, ...
## $ White_Alone                                          <dbl> 0.399, 0.441, ...
## $ LANGUAGE_Population_5_Years_and_Over                 <dbl> 2998544, 14421...
## $ LANGUAGE_English_Only                                <dbl> 0.545, 0.660, ...
## $ `LANGUAGE_Other_Than_ English`                       <dbl> 0.455, 0.340, ...
## $ LANGUAGE_Limited_English                             <dbl> 0.187, 0.130, ...
## $ INCOME_12_MONTHS_Population                          <dbl> 1040394, 54356...
## $ INCOME_12_MONTHS_With_Earnings                       <dbl> 0.820, 0.787, ...
## $ INCOME_12_MONTHS_Mean_Dollars_Earned                 <dbl> 121791, 90253,...
## $ INCOME_12_MONTHS_WITH_Social_Security                <dbl> 0.271, 0.282, ...
## $ INCOME_12_MONTHS_SS_Mean_Dollars                     <dbl> 20360, 18273, ...
## $ INCOME_12_MONTHS_With_Suppl_Security_Income          <dbl> 0.044, 0.075, ...
## $ INCOME_12_MONTHS_Mean_Suppl_Sec_Dollars              <dbl> 9885, 10304, 9...
## $ INCOME_12_MONTHS_With_Cash_Public_Assistance         <dbl> 0.020, 0.038, ...
## $ INCOME_12_MONTHS_Mean_Cash_Public_Assistance_Dollars <dbl> 4095, 5008, 37...
## $ INCOME_12_MONTHS_With_Retirement_Income              <dbl> 0.159, 0.202, ...
## $ INCOME_12_MONTHS_Mean_Retirement_Income_Dollars      <dbl> 37804, 36985, ...
## $ INCOME_12_MONTHS_With_Food_Stamps                    <dbl> 0.059, 0.106, ...
## $ POVERTY_STATUS_Population                            <dbl> 3148187, 15227...
## $ POVERTY_STATUS_Below_100_Percent_Level               <dbl> 0.105, 0.143, ...
## $ POVERTY_STATUS_100_to_149_Percent_Level              <dbl> 0.066, 0.091, ...
## $ POVERTY_STATUS_Above_150_Percent_Level               <dbl> 0.829, 0.767, ...
## $ STATEFP                                              <dbl> 6, 6, 6, 6, 6,...
## $ CountyName                                           <chr> "Orange", "Sac...
## $ State                                                <chr> "California", ...
## $ lat                                                  <dbl> 33.71696, 38.4...
## $ lon                                                  <dbl> -117.7533, -12...
## $ PopulationEstimate2018                               <dbl> 3185968, 15409...
## $ PopTotalMale2017                                     <dbl> 1574674, 74805...
## $ PopTotalFemale2017                                   <dbl> 1615726, 78255...
## $ `PopulationEstimate65+2017`                          <dbl> 456229, 209612...
## $ CensusPopulation2010                                 <dbl> 3010232, 14187...
## $ MedianAge2010                                        <dbl> 36.2, 34.8, 36...
## $ dem_to_rep_ratio                                     <dbl> 1.202728, 1.71...
## $ `#Hospitals`                                         <dbl> 24, 8, 7, 7, 7...
## $ `#ICU_beds`                                          <dbl> 651, 396, 139,...
## $ pop_fmless5                                          <dbl> 93359, 49394, ...
## $ pop_mless5                                           <dbl> 98332, 51669, ...
## $ pop_fm5to9                                           <dbl> 97192, 48090, ...
## $ pop_m5to9                                            <dbl> 101577, 50022,...
## $ pop_m10to14                                          <dbl> 107446, 51128,...
## $ pop_fm10to14                                         <dbl> 102749, 48692,...
## $ pop_m15to19                                          <dbl> 117278, 54270,...
## $ pop_fm15to19                                         <dbl> 110411, 51410,...
## $ pop_m20to24                                          <dbl> 110168, 51147,...
## $ pop_fm20to24                                         <dbl> 103433, 50761,...
## $ pop_m25to29                                          <dbl> 110805, 53768,...
## $ pop_fm25to29                                         <dbl> 104557, 54154,...
## $ pop_m30to34                                          <dbl> 100050, 49407,...
## $ pop_fm30to34                                         <dbl> 98116, 49317, ...
## $ pop_m35to44                                          <dbl> 216641, 95020,...
## $ pop_fm35to44                                         <dbl> 222402, 95815,...
## $ pop_m45to54                                          <dbl> 220283, 97859,...
## $ pop_fm45to54                                         <dbl> 223902, 102677...
## $ pop_m55to59                                          <dbl> 84699, 40354, ...
## $ pop_fm55to59                                         <dbl> 90428, 44978, ...
## $ pop_m60to64                                          <dbl> 70010, 33113, ...
## $ pop_fm60to64                                         <dbl> 76717, 37192, ...
## $ pop_m65to74                                          <dbl> 86857, 37285, ...
## $ pop_fm65to74                                         <dbl> 100597, 46010,...
## $ pop_m75to84                                          <dbl> 47665, 21881, ...
## $ pop_fm75to84                                         <dbl> 65038, 30312, ...
## $ pop_mover84                                          <dbl> 16969, 7870, 4...
## $ pop_fmover84                                         <dbl> 32551, 15193, ...
## $ SVIPercentile                                        <dbl> 0.4191, 0.7338...
## $ tot_deaths                                           <dbl> 1391, 464, 159...
## $ tot_cases                                            <dbl> 56436, 24102, ...
## $ proportion_fmless_5                                  <dbl> 0.03101389, 0....
## $ proportion_mless_5                                   <dbl> 0.03266592, 0....
## $ proportion_fm5to9                                    <dbl> 0.03228721, 0....
## $ proportion_m5to9                                     <dbl> 0.03374391, 0....
## $ proportion_m10to14                                   <dbl> 0.03569359, 0....
## $ proportion_fm10to14                                  <dbl> 0.03413325, 0....
## $ proportion_m15to19                                   <dbl> 0.03895979, 0....
## $ proportion_fm15to19                                  <dbl> 0.03667857, 0....
## $ proportion_m20to24                                   <dbl> 0.03659784, 0....
## $ proportion_fm20to24                                  <dbl> 0.03436047, 0....
## $ proportion_m25to29                                   <dbl> 0.03680946, 0....
## $ proportion_fm25to29                                  <dbl> 0.03680946, 0....
## $ proportion_m30to34                                   <dbl> 0.03323664, 0....
## $ proportion_fm30to34                                  <dbl> 0.03259417, 0....
## $ proportion_m35to44                                   <dbl> 0.07196821, 0....
## $ proportion_fm35to44                                  <dbl> 0.07196821, 0....
## $ proportion_m45to54                                   <dbl> 0.07317808, 0....
## $ proportion_fm45to54                                  <dbl> 0.07438031, 0....
## $ proportion_m55to59                                   <dbl> 0.02813703, 0....
## $ proportion_fm55to59                                  <dbl> 0.03004021, 0....
## $ proportion_m60to64                                   <dbl> 0.02325734, 0....
## $ proportion_fm60to64                                  <dbl> 0.02548541, 0....
## $ proportion_m65to74                                   <dbl> 0.02885392, 0....
## $ proportion_fm65to74                                  <dbl> 0.03341835, 0....
## $ proportion_m75to84                                   <dbl> 0.01583433, 0....
## $ proportion_fm75to84                                  <dbl> 0.02160564, 0....
## $ proportion_mover84                                   <dbl> 0.005637107, 0...
## $ proportion_fmover84                                  <dbl> 0.010813452, 0...
## $ covid_mortality_rate                                 <dbl> 0.02464739, 0....

##Race

#Race by County and State

This Scatter plot gives a basic visualization of distribution of race populations in different states. Of particular interest is California, New York, Texas, Florida, and Maryland. These states have an ample number of county data recorded and show a variable distribution of races. Focused on New York due to its significance, and distribution of races.

ggplot(race_populations) + geom_point(aes(x=REGION, y = pop_white), color = "gray") +
  geom_point(aes(x=REGION, y = pop_black), color = "black") + 
  geom_point(aes(x=REGION, y = pop_amer_indian_alaska_native), color = "green") +
  geom_point(aes(x=REGION, y = pop_asian), color = "gold") +
  geom_point(aes(x=REGION, y = pop_native_hawaiian_pacific_islander), color = "blue") + 
  geom_point(aes(x=REGION, y = pop_other), color = "purple") +
  theme(axis.text.x = element_text(angle=60, hjust=1))

Created NY race populations

ny_demographics_race <- ny_data %>% 
  select(COUNTY, Population, RACE_One_Race, RACE_White, RACE_Black, `RACE_American_Indian_ Alaska_Native`, RACE_Asian, RACE_Native_Hawaiian_Other_Pacific_Islander, `RACE_ Other`, RACE_TWO_PLUS, RACE_Hispanic_Latino, White_Alone) %>%
  mutate(pop_white = RACE_White*Population, pop_black = RACE_Black*Population, pop_amer_indian_alaska_native = `RACE_American_Indian_ Alaska_Native`*Population, pop_asian = RACE_Asian*Population, pop_native_hawaiian_pacific_islander = RACE_Native_Hawaiian_Other_Pacific_Islander*Population, pop_other = `RACE_ Other`*Population, pop_hispanic_latino = RACE_Hispanic_Latino*Population)

glimpse(ny_demographics_race)
## Rows: 9
## Columns: 19
## $ COUNTY                                      <chr> "Queens", "Westchester"...
## $ Population                                  <dbl> 2278906, 967612, 143213...
## $ RACE_One_Race                               <dbl> 0.962, 0.970, 0.957, 0....
## $ RACE_White                                  <dbl> 0.372, 0.631, 0.225, 0....
## $ RACE_Black                                  <dbl> 0.184, 0.149, 0.356, 0....
## $ `RACE_American_Indian_ Alaska_Native`       <dbl> 0.004, 0.003, 0.006, 0....
## $ RACE_Asian                                  <dbl> 0.258, 0.061, 0.038, 0....
## $ RACE_Native_Hawaiian_Other_Pacific_Islander <dbl> 0.000, 0.000, 0.001, 0....
## $ `RACE_ Other`                               <dbl> 0.143, 0.126, 0.331, 0....
## $ RACE_TWO_PLUS                               <dbl> 0.038, 0.030, 0.043, 0....
## $ RACE_Hispanic_Latino                        <dbl> 0.281, 0.251, 0.564, 0....
## $ White_Alone                                 <dbl> 0.247, 0.529, 0.089, 0....
## $ pop_white                                   <dbl> 847753.0, 610563.2, 322...
## $ pop_black                                   <dbl> 419318.7, 144174.2, 509...
## $ pop_amer_indian_alaska_native               <dbl> 9115.624, 2902.836, 859...
## $ pop_asian                                   <dbl> 587957.75, 59024.33, 54...
## $ pop_native_hawaiian_pacific_islander        <dbl> 0.000, 0.000, 1432.132,...
## $ pop_other                                   <dbl> 325883.56, 121919.11, 4...
## $ pop_hispanic_latino                         <dbl> 640372.59, 242870.61, 8...
write.csv(ny_demographics_race,"C:\\Users\\User\\Desktop\\ny_demographic_race.csv", row.names = FALSE)

Race by NY County Included plot of covid mortality rate by county. The Bronx, Kings, New York, and Queens all have higher mortality rates. They are all relatively close - centered around New York City. Erie and Monroe are both near larger cities (Buffalo and Rochester, respectively) but Nassau, Suffolk, and Westchester are all located near Long Island. What are the differences between both of these New York City counties?

Distributions of each race category show higher populations of White folks in all counties with Bronx and Monroe being the lowest. Bronx and Kings county have high populations of black, hispanic/latino, native hawaiian/pacific islander, and ‘other’ folks.

ggplot(ny_demographics_race, aes(x=COUNTY, y=pop_white)) + geom_col(position = "dodge")

ggplot(ny_demographics_race, aes(x=COUNTY, y=pop_black)) + geom_col(position = "dodge", fill = 'coral')

ggplot(ny_demographics_race, aes(x=COUNTY, y=pop_amer_indian_alaska_native)) + geom_col(position = "dodge", fill = "steel blue")

ggplot(ny_demographics_race, aes(x=COUNTY, y=pop_asian)) + geom_col(position = "dodge", fill = "light green")

ggplot(ny_demographics_race, aes(x=COUNTY, y=pop_native_hawaiian_pacific_islander)) + geom_col(position = "dodge", fill = "pink")

ggplot(ny_demographics_race, aes(x=COUNTY, y=pop_other)) + geom_col(position = "dodge", fill = "gold")

ggplot(ny_demographics_race, aes(x=COUNTY, y=pop_hispanic_latino)) + geom_col(position = "dodge", fill = "orange")

Michigan Race

mi_demographics_race <- mi_data %>% 
  select(COUNTY, Population, RACE_One_Race, RACE_White, RACE_Black, `RACE_American_Indian_ Alaska_Native`, RACE_Asian, RACE_Native_Hawaiian_Other_Pacific_Islander, `RACE_ Other`, RACE_TWO_PLUS, RACE_Hispanic_Latino, White_Alone) %>%
  mutate(pop_white = RACE_White*Population, pop_black = RACE_Black*Population, pop_amer_indian_alaska_native = `RACE_American_Indian_ Alaska_Native`*Population, pop_asian = RACE_Asian*Population, pop_native_hawaiian_pacific_islander = RACE_Native_Hawaiian_Other_Pacific_Islander*Population, pop_other = `RACE_ Other`*Population, pop_hispanic_latino = RACE_Hispanic_Latino*Population)

glimpse(mi_demographics_race)
## Rows: 4
## Columns: 19
## $ COUNTY                                      <chr> "Wayne", "Macomb", "Ken...
## $ Population                                  <dbl> 1753893, 874759, 653786...
## $ RACE_One_Race                               <dbl> 0.974, 0.977, 0.958, 0.975
## $ RACE_White                                  <dbl> 0.531, 0.805, 0.795, 0.747
## $ RACE_Black                                  <dbl> 0.385, 0.121, 0.096, 0.139
## $ `RACE_American_Indian_ Alaska_Native`       <dbl> 0.003, 0.003, 0.004, 0.002
## $ RACE_Asian                                  <dbl> 0.034, 0.042, 0.032, 0.078
## $ RACE_Native_Hawaiian_Other_Pacific_Islander <dbl> 0.000, 0.000, 0.001, 0.000
## $ `RACE_ Other`                               <dbl> 0.020, 0.006, 0.030, 0.008
## $ RACE_TWO_PLUS                               <dbl> 0.026, 0.023, 0.042, 0.025
## $ RACE_Hispanic_Latino                        <dbl> 0.061, 0.027, 0.107, 0.042
## $ White_Alone                                 <dbl> 0.493, 0.785, 0.734, 0.715
## $ pop_white                                   <dbl> 931317.2, 704181.0, 519...
## $ pop_black                                   <dbl> 675248.81, 105845.84, 6...
## $ pop_amer_indian_alaska_native               <dbl> 5261.679, 2624.277, 261...
## $ pop_asian                                   <dbl> 59632.36, 36739.88, 209...
## $ pop_native_hawaiian_pacific_islander        <dbl> 0.000, 0.000, 653.786, ...
## $ pop_other                                   <dbl> 35077.860, 5248.554, 19...
## $ pop_hispanic_latino                         <dbl> 106987.47, 23618.49, 69...
write.csv(mi_demographics_race,"C:\\Users\\User\\Desktop\\ny_demographic_race.csv", row.names = FALSE)

Race by MI County Reviewing race distributions in MI county shows Wayne to have the highest populations of the various races, with the highest population of black folks.

ggplot(mi_data, aes(x=COUNTY, y=covid_mortality_rate)) + geom_point()

ggplot(mi_demographics_race, aes(x=COUNTY, y=pop_white)) + geom_col(position = "dodge")

ggplot(mi_demographics_race, aes(x=COUNTY, y=pop_black)) + geom_col(position = "dodge", fill = 'coral')

ggplot(mi_demographics_race, aes(x=COUNTY, y=pop_amer_indian_alaska_native)) + geom_col(position = "dodge", fill = "steel blue")

ggplot(mi_demographics_race, aes(x=COUNTY, y=pop_asian)) + geom_col(position = "dodge", fill = "light green")

ggplot(mi_demographics_race, aes(x=COUNTY, y=pop_native_hawaiian_pacific_islander)) + geom_col(position = "dodge", fill = "pink")

ggplot(mi_demographics_race, aes(x=COUNTY, y=pop_other)) + geom_col(position = "dodge", fill = "gold")

ggplot(mi_demographics_race, aes(x=COUNTY, y=pop_hispanic_latino)) + geom_col(position = "dodge", fill = "orange")

California Race

ca_demographics_race <- ca_data %>% 
  select(COUNTY, Population, RACE_One_Race, RACE_White, RACE_Black, `RACE_American_Indian_ Alaska_Native`, RACE_Asian, RACE_Native_Hawaiian_Other_Pacific_Islander, `RACE_ Other`, RACE_TWO_PLUS, RACE_Hispanic_Latino, White_Alone) %>%
  mutate(pop_white = RACE_White*Population, pop_black = RACE_Black*Population, pop_amer_indian_alaska_native = `RACE_American_Indian_ Alaska_Native`*Population, pop_asian = RACE_Asian*Population, pop_native_hawaiian_pacific_islander = RACE_Native_Hawaiian_Other_Pacific_Islander*Population, pop_other = `RACE_ Other`*Population, pop_hispanic_latino = RACE_Hispanic_Latino*Population)

glimpse(ca_demographics_race)
## Rows: 16
## Columns: 19
## $ COUNTY                                      <chr> "Orange", "Sacramento",...
## $ Population                                  <dbl> 3185968, 1540975, 85096...
## $ RACE_One_Race                               <dbl> 0.958, 0.925, 0.954, 0....
## $ RACE_White                                  <dbl> 0.604, 0.567, 0.806, 0....
## $ RACE_Black                                  <dbl> 0.019, 0.099, 0.017, 0....
## $ `RACE_American_Indian_ Alaska_Native`       <dbl> 0.004, 0.007, 0.006, 0....
## $ RACE_Asian                                  <dbl> 0.209, 0.161, 0.076, 0....
## $ RACE_Native_Hawaiian_Other_Pacific_Islander <dbl> 0.003, 0.012, 0.002, 0....
## $ `RACE_ Other`                               <dbl> 0.119, 0.078, 0.047, 0....
## $ RACE_TWO_PLUS                               <dbl> 0.042, 0.075, 0.046, 0....
## $ RACE_Hispanic_Latino                        <dbl> 0.342, 0.234, 0.430, 0....
## $ White_Alone                                 <dbl> 0.399, 0.441, 0.449, 0....
## $ pop_white                                   <dbl> 1924324.7, 873732.8, 68...
## $ pop_black                                   <dbl> 60533.392, 152556.525, ...
## $ pop_amer_indian_alaska_native               <dbl> 12743.872, 10786.825, 5...
## $ pop_asian                                   <dbl> 665867.31, 248096.98, 6...
## $ pop_native_hawaiian_pacific_islander        <dbl> 9557.904, 18491.700, 17...
## $ pop_other                                   <dbl> 379130.19, 120196.05, 3...
## $ pop_hispanic_latino                         <dbl> 1089601.1, 360588.2, 36...
write.csv(ca_demographics_race,"C:\\Users\\User\\Desktop\\ca_demographic_race.csv", row.names = FALSE)

Race by CA County Reviewing distributions of race in California show consistency across counties.

ggplot(ca_data, aes(x=COUNTY, y=covid_mortality_rate)) + geom_point()

ggplot(ca_demographics_race, aes(x=COUNTY, y=pop_white)) + geom_col(position = "dodge") + 
    theme(axis.text.x = element_text(angle=60, hjust=1))

ggplot(ca_demographics_race, aes(x=COUNTY, y=pop_black)) + geom_col(position = "dodge", fill = 'coral') + 
    theme(axis.text.x = element_text(angle=60, hjust=1))

ggplot(ca_demographics_race, aes(x=COUNTY, y=pop_amer_indian_alaska_native)) + geom_col(position = "dodge", fill = "steel blue") + 
    theme(axis.text.x = element_text(angle=60, hjust=1))

ggplot(ca_demographics_race, aes(x=COUNTY, y=pop_asian)) + geom_col(position = "dodge", fill = "light green") + 
    theme(axis.text.x = element_text(angle=60, hjust=1))

ggplot(ca_demographics_race, aes(x=COUNTY, y=pop_native_hawaiian_pacific_islander)) + geom_col(position = "dodge", fill = "pink") + 
    theme(axis.text.x = element_text(angle=60, hjust=1))

ggplot(ca_demographics_race, aes(x=COUNTY, y=pop_other)) + geom_col(position = "dodge", fill = "gold") + 
    theme(axis.text.x = element_text(angle=60, hjust=1))

ggplot(ca_demographics_race, aes(x=COUNTY, y=pop_hispanic_latino)) + geom_col(position = "dodge", fill = "orange") + 
    theme(axis.text.x = element_text(angle=60, hjust=1))

#NY Regression Covid Mortality Rate and County Difficult to analyze this model due to NA values. This could be rectified with a larger sample of NY counties to examine covid mortality across the state.

model_death_ny <- lm(covid_mortality_rate ~ COUNTY, data = ny_data)
get_regression_table(model_death_ny)
## Warning in qt(a, object$df.residual): NaNs produced
get_regression_summaries(model_death_ny)
ggplot(ny_data, mapping = aes(y = RACE_White, x = covid_mortality_rate)) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#MI Regression Covid Mortality Rate and County A simliar instance with Michigan counties. A larger sample size would yield better results

model_death_mi <- lm(covid_mortality_rate ~ COUNTY, data = mi_data)
get_regression_table(model_death_mi)
## Warning in qt(a, object$df.residual): NaNs produced
get_regression_summaries(model_death_mi)
ggplot(mi_data, mapping = aes(y = RACE_White, x = covid_mortality_rate)) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#CA Regression Covid Mortality Rate and County Again, encountering a problem with small sample size.

model_death_ca <- lm(covid_mortality_rate ~ COUNTY, data = mi_data)
get_regression_table(model_death_ca)
## Warning in qt(a, object$df.residual): NaNs produced
get_regression_summaries(model_death_ca)
ggplot(ca_data, mapping = aes(y = RACE_White, x = covid_mortality_rate)) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#Nationwide Covid Mortality Rate and Race Basic Regression Nationwide Covid Mortality Rate and Race (white) This model shows an insignificant, weak (correlation -0.10) relationship between proportion of white folks and Covid mortality rate. As the proportion of White folks in counties increase, covid mortality rate decreases at a rate of -0.018. This is not a good model as shown by insignificance (0.23 < 0.05) and low R^2 value (0.011), only 1% of variance in values explained by the model.

model_death_white <- lm(RACE_White ~ covid_mortality_rate, data = death_rate)
get_regression_table(model_death_white)
get_regression_summaries(model_death_white)
ggplot(death_rate, aes(y= RACE_White, x=covid_mortality_rate)) + geom_point() + 
  geom_smooth(method='lm', se=FALSE)
## `geom_smooth()` using formula 'y ~ x'

cor(death_rate$RACE_White, death_rate$covid_mortality_rate)
## [1] -0.1044994

Basic Regression Nationwide Covid Mortality Rate and Race (black) This model shows an insignificant, weak (correlation 0.098) relationship between proportion of black folks and Covid mortality rate. As the proportion of black folks in counties increase, covid mortality rate increases at a rate of 0.021. This is not a good model as shown by insignificance (0.26 < 0.05) and low R^2 value (0.01), only 1% of variance in values explained by the model.

model_death_black <- lm(RACE_Black ~ covid_mortality_rate, data = death_rate)
get_regression_table(model_death_black)
get_regression_summaries(model_death_black)
cor(death_rate$RACE_Black, death_rate$covid_mortality_rate)
## [1] 0.09815762
ggplot(death_rate, aes(y= RACE_Black, x=covid_mortality_rate)) + geom_point() + 
  geom_smooth(method='lm', se=FALSE)
## `geom_smooth()` using formula 'y ~ x'

Basic Regression Nationwide Covid Mortality Rate and Race (American Indian and Alaska Native) This model shows an significant (0.004 < 0.5), although weak (correlation -0.25) relationship between proportion of American Indian and Alaskan native folks and Covid mortality rate. As the proportion of American Indian and Alaska Native folks in counties increase, covid mortality rate increases at a rate of -0.801. Although significant, this is not a good model as shown by the low R^2 value (0.062), only 6% of variance in values explained by the model.

model_death_aian <- lm(covid_mortality_rate ~ `RACE_American_Indian_ Alaska_Native`, data = death_rate)
get_regression_table(model_death_aian)
get_regression_summaries(model_death_aian)
cor(death_rate$`RACE_American_Indian_ Alaska_Native`, death_rate$covid_mortality_rate)
## [1] -0.2498764
ggplot(death_rate, mapping = aes(y = `RACE_American_Indian_ Alaska_Native`, x = covid_mortality_rate)) + geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Basic Regression Nationwide Covid Mortality Rate and Race (Asian) This model shows an insignificant (0.719 > 0.5), weak (correlation 0.031) relationship between proportion of Asian and Covid mortality rate. As the proportion of Asian folks in counties increase, covid mortality rate increases at a rate of -0.801. Although insignificant, this is not a good model as shown by the low R^2 value (0.062), only 0.1% of variance in values explained by the model.

model_death_asian <- lm(covid_mortality_rate ~ RACE_Asian, data = death_rate)
get_regression_table(model_death_asian)
get_regression_summaries(model_death_asian)
cor(death_rate$RACE_Asian, death_rate$covid_mortality_rate)
## [1] 0.03142102
ggplot(death_rate, mapping = aes(y = RACE_Asian, x = covid_mortality_rate)) + geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Basic Regression Nationwide Covid Mortality Rate and Race (Native Hawaiian and Pacific Islander) This model shows a significant (0.001 < 0.5), although weak (correlation -0.277) relationship between proportion of Native Hawaiian, other, and pacific islander and Covid mortality rate. As the proportion of Native Hawaiian, other, and pacific islander folks in counties increase, covid mortality rate increases at a rate of -2.266. Although significant, this is not a good model as shown by the low R^2 value (0.062), only 7% of variance in values explained by the model.

model_death_nhopi <- lm(covid_mortality_rate ~ RACE_Native_Hawaiian_Other_Pacific_Islander, data = death_rate)
get_regression_table(model_death_nhopi)
get_regression_summaries(model_death_nhopi)
cor(death_rate$RACE_Native_Hawaiian_Other_Pacific_Islander, death_rate$covid_mortality_rate)
## [1] -0.2773393
ggplot(death_rate,mapping = aes(x = RACE_Native_Hawaiian_Other_Pacific_Islander, y = covid_mortality_rate)) + geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Basic Regression Nationwide Covid Mortality Rate and Race (Other) This model shows an insignificant (0.068 > 0.5) weak (correlation 0.158) relationship between proportion of other races and Covid mortality rate. As the proportion of other folks in counties increase, covid mortality rate increases at a rate of 0.074. This is not a good model as shown by the low R^2 value (0.025), only 2% of variance in values explained by the model.

model_death_other <- lm(covid_mortality_rate ~ `RACE_ Other`, data = death_rate)
get_regression_table(model_death_other)
get_regression_summaries(model_death_other)
cor(death_rate$`RACE_ Other`, death_rate$covid_mortality_rate)
## [1] 0.1580032
ggplot(death_rate, mapping = aes(y = `RACE_ Other`, x = covid_mortality_rate)) + geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Basic Regression Nationwide Covid Mortality Rate and Race (Hispanic and Latino) This model shows an insignificant (0.344 > 0.5) weak (correlation -0.082) relationship between proportion of Hispanic and Latino folks and Covid mortality rate. As the proportion of Hispanic and Latino folks in counties increase, covid mortality rate increases at a rate of -0.559. This is not a good model as shown by the low R^2 value (0.007), only .7% of variance in values explained by the model.

model_death_hispaniclatino <- lm(RACE_Hispanic_Latino ~ covid_mortality_rate, data = death_rate)
get_regression_table(model_death_hispaniclatino)
get_regression_summaries(model_death_hispaniclatino)
cor(death_rate$RACE_Hispanic_Latino, death_rate$covid_mortality_rate)
## [1] -0.08242166
ggplot(death_rate, mapping = aes(y = RACE_Hispanic_Latino, x = covid_mortality_rate)) + geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#NY Regression Covid Mortality and Race County and Race (White) as interactions

model_death_counties_white_ny <- lm(covid_mortality_rate ~ COUNTY*RACE_White, data = ny_data)
get_regression_table(model_death_counties_white_ny)
## Warning in qt(a, object$df.residual): NaNs produced
get_regression_summaries(model_death_counties_white_ny)

County and Race (Black) as interactions

model_death_counties_black_ny <- lm(covid_mortality_rate ~ COUNTY*RACE_Black, data = ny_data)
get_regression_table(model_death_counties_black_ny)
## Warning in qt(a, object$df.residual): NaNs produced
get_regression_summaries(model_death_counties_black_ny)

Basic Regression Mortality rate and Race (White) Although insignificant at the national level, the variables are significant for New York counties. There is a strong relationship (-0.835) between covid mortality rate and the proportion of white folks in NY counties.

model_death_white_ny <- lm(covid_mortality_rate ~ RACE_White, data = ny_data)
get_regression_table(model_death_white_ny)
get_regression_summaries(model_death_white_ny)
cor(ny_data$RACE_White, ny_data$covid_mortality_rate)
## [1] -0.8355895
ggplot(ny_data, mapping = aes(y = RACE_White, x = covid_mortality_rate ), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Basic Regression Mortality rate and Race (Black) Although insignificant at the national level, the variables are significant for New York counties (0.035 < 0.05. There is a strong relationship (0.70) between covid mortality rate and the proportion of black folks in NY counties.

model_death_black_ny <- lm(covid_mortality_rate ~ RACE_Black, data = ny_data)
get_regression_table(model_death_black_ny)
get_regression_summaries(model_death_black_ny)
cor(ny_data$RACE_Black, ny_data$covid_mortality_rate)
## [1] 0.701811
ggplot(ny_data, mapping = aes(y = RACE_Black, x = covid_mortality_rate)) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Basic Regression Mortality rate and Race (American Indian and Alaska Natives) Insignificant within NY counties

model_death_aian_ny <- lm(covid_mortality_rate ~ `RACE_American_Indian_ Alaska_Native`, data = ny_data)
get_regression_table(model_death_aian_ny)
get_regression_summaries(model_death_aian_ny)
cor(ny_data$`RACE_American_Indian_ Alaska_Native`, ny_data$covid_mortality_rate)
## [1] 0.3508525
ggplot(ny_data, mapping = aes(y = `RACE_American_Indian_ Alaska_Native`, x = covid_mortality_rate)) + geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Basic Regression Mortality rate and Race (Asian) Insignificant within NY counties

model_death_asian_ny <- lm(covid_mortality_rate ~ RACE_Asian, data = ny_data)
get_regression_table(model_death_asian_ny)
get_regression_summaries(model_death_asian_ny)
cor(ny_data$RACE_Asian, ny_data$covid_mortality_rate)
## [1] 0.5962005
ggplot(ny_data, mapping = aes(y = RACE_Asian, x = covid_mortality_rate)) + geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Basic Regression Mortality rate and Race (Native Hawaiian and Pacific Islander) Insignificant within NY counties

model_death_nhopi_ny <- lm(covid_mortality_rate ~ RACE_Native_Hawaiian_Other_Pacific_Islander, data = ny_data)
get_regression_table(model_death_nhopi_ny)
get_regression_summaries(model_death_nhopi_ny)
cor(ny_data$RACE_Native_Hawaiian_Other_Pacific_Islander, ny_data$covid_mortality_rate)
## [1] 0.5456615
ggplot(ny_data,mapping = aes(y = RACE_Native_Hawaiian_Other_Pacific_Islander, x = covid_mortality_rate)) + geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Basic Regression Mortality rate and Race (Other) Insignificant within NY counties

model_death_other_ny <- lm(covid_mortality_rate ~ `RACE_ Other`, data = ny_data)
get_regression_table(model_death_other_ny)
get_regression_summaries(model_death_other_ny)
cor(ny_data$`RACE_ Other`, ny_data$covid_mortality_rate)
## [1] 0.5518382
ggplot(ny_data, mapping = aes(y = `RACE_ Other`, x = covid_mortality_rate)) + geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Basic Regression Mortality rate and Race (Other)

model_death_hispaniclatino_ny <- lm(covid_mortality_rate ~ RACE_Hispanic_Latino, data = ny_data)
get_regression_table(model_death_hispaniclatino_ny)
get_regression_summaries(model_death_hispaniclatino_ny)
cor(ny_data$RACE_Hispanic_Latino, ny_data$covid_mortality_rate)
## [1] 0.5201505
ggplot(ny_data, mapping = aes(y = RACE_Hispanic_Latino, x = covid_mortality_rate)) + geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

##Language I only analyzed Language other than English variable. Additional Language variables were analyzed by Gowri in Python

#Language by State

ggplot(death_rate, aes(x=REGION, y=`LANGUAGE_Other_Than_ English`)) + geom_col() + theme(axis.text.x = element_text(angle=60, hjust=1))

Basic Regression Nationwide Language (Other than English) and Covid Mortality Rate Language and covid mortality rate are insignificant at the national level.

model_death_language <- lm(covid_mortality_rate ~ `LANGUAGE_Other_Than_ English`, data = death_rate)
get_regression_table(model_death_language)
get_regression_summaries(model_death_language)
cor(death_rate$`LANGUAGE_Other_Than_ English`, death_rate$covid_mortality_rate)
## [1] 0.1234168
ggplot(death_rate, mapping = aes(y = `LANGUAGE_Other_Than_ English`, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

New York Languages

ny_demographics_language <- ny_data %>% select(COUNTY, LANGUAGE_Population_5_Years_and_Over, LANGUAGE_English_Only, `LANGUAGE_Other_Than_ English`, LANGUAGE_Limited_English)
glimpse(ny_demographics_language)
## Rows: 9
## Columns: 5
## $ COUNTY                               <chr> "Queens", "Westchester", "Bron...
## $ LANGUAGE_Population_5_Years_and_Over <dbl> 2137881, 913875, 1329368, 8695...
## $ LANGUAGE_English_Only                <dbl> 0.436, 0.654, 0.410, 0.889, 0....
## $ `LANGUAGE_Other_Than_ English`       <dbl> 0.564, 0.346, 0.590, 0.111, 0....
## $ LANGUAGE_Limited_English             <dbl> 0.294, 0.124, 0.263, 0.044, 0....
write.csv(ny_demographics_language,"C:\\Users\\User\\Desktop\\ny_demographics_language.csv", row.names = FALSE)

NY Language (Other than English) by County

ggplot(ny_demographics_language, aes(x=COUNTY, y=`LANGUAGE_Other_Than_ English`)) + geom_col()

NY Language (Other than English) and Covid Mortality Rate Regression There is a significant (0.01 < 0.05), strong relationship (0.80) between Language other than English Speakers and covid mortality rate within NY counties. As the proportion between language other than english speakers increases, so does covid mortality rate. 60% of variance between values is explained by the model.

model_death_language_ny <- lm(covid_mortality_rate ~ `LANGUAGE_Other_Than_ English`, data = ny_data)
get_regression_table(model_death_language_ny)
get_regression_summaries(model_death_language_ny)
cor(ny_data$`LANGUAGE_Other_Than_ English`, ny_data$covid_mortality_rate)
## [1] 0.7988953
ggplot(ny_data, mapping = aes(y = `LANGUAGE_Other_Than_ English`, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#Quick check on California - highest language other than english count Insignificant within California counties

ggplot(ca_data, aes(x=COUNTY, y=`LANGUAGE_Other_Than_ English`)) + geom_col() + theme(axis.text.x = element_text(angle=60, hjust=1))

model_death_language_ca <- lm(covid_mortality_rate ~ `LANGUAGE_Other_Than_ English`, data = ca_data)
get_regression_table(model_death_language_ca)
get_regression_summaries(model_death_language_ca)
cor(ca_data$`LANGUAGE_Other_Than_ English`, ca_data$covid_mortality_rate)
## [1] 0.3436648
ggplot(ca_data, mapping = aes(y = `LANGUAGE_Other_Than_ English`, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

##Income

#Income by county/state

Nationwide Income (With Social Security)

ggplot(death_rate, aes(x=REGION, y=INCOME_12_MONTHS_WITH_Social_Security)) + geom_col() + theme(axis.text.x = element_text(angle=60, hjust=1))

Basic Regression Nationwide Income (With Social Security) and Covid Mortality Rate Insignificant at the National level

model_death_income_socialsec <- lm(covid_mortality_rate ~ INCOME_12_MONTHS_WITH_Social_Security , data = death_rate)
get_regression_table(model_death_income_socialsec)
get_regression_summaries(model_death_income_socialsec)
cor(death_rate$INCOME_12_MONTHS_WITH_Social_Security, death_rate$covid_mortality_rate)
## [1] 0.2526753
ggplot(death_rate, mapping = aes(y = INCOME_12_MONTHS_WITH_Social_Security, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Nationwide Income (Mean Social Security Dollars)

ggplot(death_rate, aes(x=REGION, y=INCOME_12_MONTHS_SS_Mean_Dollars)) + geom_col() + theme(axis.text.x = element_text(angle=60, hjust=1))

Basic Regression Nationwide Income (Mean Social Security Dollars) Insignificant at the National level

model_death_income_meansocialsec <- lm(covid_mortality_rate ~ INCOME_12_MONTHS_SS_Mean_Dollars , data = death_rate)
get_regression_table(model_death_income_meansocialsec)
get_regression_summaries(model_death_income_meansocialsec)
cor(death_rate$INCOME_12_MONTHS_SS_Mean_Dollars, death_rate$covid_mortality_rate)
## [1] -0.04494376
ggplot(death_rate, mapping = aes(y = INCOME_12_MONTHS_SS_Mean_Dollars, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#NY Income

ny_demographics_income <- ny_data %>% select(COUNTY, INCOME_12_MONTHS_Population, INCOME_12_MONTHS_With_Earnings, INCOME_12_MONTHS_Mean_Dollars_Earned, INCOME_12_MONTHS_WITH_Social_Security, INCOME_12_MONTHS_SS_Mean_Dollars, INCOME_12_MONTHS_With_Suppl_Security_Income, INCOME_12_MONTHS_Mean_Suppl_Sec_Dollars, INCOME_12_MONTHS_With_Cash_Public_Assistance, INCOME_12_MONTHS_Mean_Cash_Public_Assistance_Dollars, INCOME_12_MONTHS_With_Retirement_Income, INCOME_12_MONTHS_Mean_Retirement_Income_Dollars, INCOME_12_MONTHS_With_Food_Stamps)
glimpse(ny_demographics_income)
## Rows: 9
## Columns: 13
## $ COUNTY                                               <chr> "Queens", "Wes...
## $ INCOME_12_MONTHS_Population                          <dbl> 788110, 352498...
## $ INCOME_12_MONTHS_With_Earnings                       <dbl> 0.809, 0.794, ...
## $ INCOME_12_MONTHS_Mean_Dollars_Earned                 <dbl> 93677, 155345,...
## $ INCOME_12_MONTHS_WITH_Social_Security                <dbl> 0.288, 0.328, ...
## $ INCOME_12_MONTHS_SS_Mean_Dollars                     <dbl> 18027, 21926, ...
## $ INCOME_12_MONTHS_With_Suppl_Security_Income          <dbl> 0.056, 0.049, ...
## $ INCOME_12_MONTHS_Mean_Suppl_Sec_Dollars              <dbl> 9592, 9638, 88...
## $ INCOME_12_MONTHS_With_Cash_Public_Assistance         <dbl> 0.028, 0.021, ...
## $ INCOME_12_MONTHS_Mean_Cash_Public_Assistance_Dollars <dbl> 3196, 3975, 32...
## $ INCOME_12_MONTHS_With_Retirement_Income              <dbl> 0.152, 0.194, ...
## $ INCOME_12_MONTHS_Mean_Retirement_Income_Dollars      <dbl> 26479, 40134, ...
## $ INCOME_12_MONTHS_With_Food_Stamps                    <dbl> 0.130, 0.091, ...
write.csv(ny_demographics_income,"C:\\Users\\User\\Desktop\\ny_demographics_income.csv", row.names = FALSE)

NY Income (Mean Dollars Earned) by County

ggplot(ny_demographics_income, aes(x=COUNTY, y=INCOME_12_MONTHS_Mean_Dollars_Earned)) + geom_col()

Basic Regression NY Income (Mean Dollars Earned) and Covid Mortality Rate Insignificant within NY counties

model_death_income_ny <- lm(covid_mortality_rate ~ INCOME_12_MONTHS_Mean_Dollars_Earned, data = ny_data)
get_regression_table(model_death_income_ny)
get_regression_summaries(model_death_income_ny)
cor(ny_data$INCOME_12_MONTHS_SS_Mean_Dollars, ny_data$covid_mortality_rate)
## [1] -0.8447487
ggplot(ny_data, mapping = aes(y = INCOME_12_MONTHS_Mean_Dollars_Earned, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

NY Income (Mean Cash Public Assistance) by County

ggplot(ny_demographics_income, aes(x=COUNTY, y=INCOME_12_MONTHS_Mean_Cash_Public_Assistance_Dollars)) + geom_col()

Basic Regression NY Income (Mean Cash Public Assistance) and Covid Mortality Rate Insignificant within NY Counties

model_death_income_ca_ny <- lm(covid_mortality_rate ~ INCOME_12_MONTHS_Mean_Cash_Public_Assistance_Dollars, data = ny_data)
get_regression_table(model_death_income_ca_ny)
get_regression_summaries(model_death_income_ca_ny)
cor(ny_data$INCOME_12_MONTHS_Mean_Cash_Public_Assistance_Dollars, ny_data$covid_mortality_rate)
## [1] -0.5234588
ggplot(ny_data, mapping = aes(y = INCOME_12_MONTHS_Mean_Cash_Public_Assistance_Dollars, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

NY Income (Mean Retirement Income) by County

ggplot(ny_demographics_income, aes(x=COUNTY, y=INCOME_12_MONTHS_Mean_Retirement_Income_Dollars)) + geom_col()

Basic Regression NY Income (Mean Retirement Income) and Covid Mortality Rate Insignificant within NY counties

model_death_income_retire_ny <- lm(covid_mortality_rate ~ INCOME_12_MONTHS_Mean_Retirement_Income_Dollars, data = ny_data)
get_regression_table(model_death_income_retire_ny)
get_regression_summaries(model_death_income_retire_ny)
cor(ny_data$INCOME_12_MONTHS_Mean_Retirement_Income_Dollars, ny_data$covid_mortality_rate)
## [1] -0.5142754
ggplot(ny_data, mapping = aes(y = INCOME_12_MONTHS_Mean_Retirement_Income_Dollars, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

NY Income (With Food Stamps) by County

ggplot(ny_demographics_income, aes(x=COUNTY, y=INCOME_12_MONTHS_With_Food_Stamps)) + geom_col()

Basic Regression NY Income (With Food Stamps) and Covid Mortality Rate Insignificant within NY counties.

model_death_income_foodstamps_ny <- lm(covid_mortality_rate ~ INCOME_12_MONTHS_With_Food_Stamps , data = ny_data)
get_regression_table(model_death_income_foodstamps_ny)
get_regression_summaries(model_death_income_foodstamps_ny)
cor(ny_data$INCOME_12_MONTHS_With_Food_Stamps, ny_data$covid_mortality_rate)
## [1] 0.5896981
ggplot(ny_data, mapping = aes(y = INCOME_12_MONTHS_With_Food_Stamps , x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

NY Income (With Social Security) by County

ggplot(ny_demographics_income, aes(x=COUNTY, y=INCOME_12_MONTHS_WITH_Social_Security)) + geom_col()

Basic Regression NY Income (With Social Security) and Covid Mortality Rate Income with Food Stamps and Covid mortality rate are significant (0 < 0.05) within NY counties. A strong relationship (-0.92) exists between income and covid mortality rate. As income with Social Security increases, covid mortality decreases at a rate of -0.589.

model_death_income_socialsec_ny <- lm(covid_mortality_rate ~ INCOME_12_MONTHS_WITH_Social_Security , data = ny_data)
get_regression_table(model_death_income_socialsec_ny)
get_regression_summaries(model_death_income_socialsec_ny)
cor(ny_data$INCOME_12_MONTHS_WITH_Social_Security, ny_data$covid_mortality_rate)
## [1] -0.9190066
ggplot(ny_data, mapping = aes(y = INCOME_12_MONTHS_WITH_Social_Security, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

NY Income (Mean Social Security Dollars) by County

ggplot(ny_demographics_income, aes(x=COUNTY, y=INCOME_12_MONTHS_SS_Mean_Dollars)) + geom_col()

Basic Regression NY Income (Mean Social Security Dollars) and Covid Mortality Rate Income with Social Security and Covid mortality rate are significant (0.004 < 0.05) within NY counties. A strong relationship (-0.84) exists between income and covid mortality rate.

model_death_income_meansocialsec_ny <- lm(covid_mortality_rate ~ INCOME_12_MONTHS_SS_Mean_Dollars , data = ny_data)
get_regression_table(model_death_income_meansocialsec_ny)
get_regression_summaries(model_death_income_meansocialsec_ny)
cor(ny_data$INCOME_12_MONTHS_SS_Mean_Dollars, ny_data$covid_mortality_rate)
## [1] -0.8447487
ggplot(ny_data, mapping = aes(y = INCOME_12_MONTHS_SS_Mean_Dollars, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

##Poverty

#Poverty Population by State

ggplot(death_rate, aes(x=REGION, y = POVERTY_STATUS_Population)) + geom_col() + theme(axis.text.x = element_text(angle=60, hjust=1))

Basic Regression Nationwide Poverty (Below 100%) and Covid Mortality Rate Insignificant at the National Level

model_death_poverty_below100 <- lm(covid_mortality_rate ~ POVERTY_STATUS_Below_100_Percent_Level, data = death_rate)
get_regression_table(model_death_poverty_below100)
get_regression_summaries(model_death_poverty_below100)
cor(death_rate$POVERTY_STATUS_Below_100_Percent_Level, death_rate$covid_mortality_rate)
## [1] 0.002756992
ggplot(death_rate, mapping = aes(y = POVERTY_STATUS_Below_100_Percent_Level, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Basic Regression Nationwide Poverty (100% to 149%) and Covid Mortality Rate Insignificant at the National level

model_death_poverty_100to149 <- lm(covid_mortality_rate ~ POVERTY_STATUS_100_to_149_Percent_Level, data = death_rate)
get_regression_table(model_death_poverty_100to149)
get_regression_summaries(model_death_poverty_100to149)
cor(death_rate$POVERTY_STATUS_100_to_149_Percent_Level, death_rate$covid_mortality_rate)
## [1] -0.1549165
ggplot(death_rate, mapping = aes(y = POVERTY_STATUS_100_to_149_Percent_Level, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Basic Regression Nationwide Poverty (Above 150%) and Covid Mortality Rate Insignificant at the National level

model_death_poverty_above150 <- lm(covid_mortality_rate ~ POVERTY_STATUS_Above_150_Percent_Level, data = death_rate)
get_regression_table(model_death_poverty_above150)
get_regression_summaries(model_death_poverty_above150)
cor(death_rate$POVERTY_STATUS_Above_150_Percent_Level, death_rate$covid_mortality_rate)
## [1] 0.05141138
ggplot(death_rate, mapping = aes(y = POVERTY_STATUS_Above_150_Percent_Level, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#NY Poverty

ny_demographics_poverty <- ny_data %>% select(COUNTY, POVERTY_STATUS_Population, POVERTY_STATUS_Below_100_Percent_Level, POVERTY_STATUS_100_to_149_Percent_Level, POVERTY_STATUS_Above_150_Percent_Level)
write.csv(ny_demographics_poverty,"C:\\Users\\User\\Desktop\\ny_demographics_poverty.csv", row.names = FALSE)

NY Poverty Population by County

ggplot(ny_demographics_poverty, aes(x=COUNTY, y = POVERTY_STATUS_Population)) + geom_col()

Basic Regression NY Poverty (Below 100%) and Covid Mortality Rate Insignificant within NY counties

model_death_poverty_below100_ny <- lm(covid_mortality_rate ~ POVERTY_STATUS_Below_100_Percent_Level, data = ny_data)
get_regression_table(model_death_poverty_below100_ny)
get_regression_summaries(model_death_poverty_below100_ny)
cor(ny_data$POVERTY_STATUS_Below_100_Percent_Level, ny_data$covid_mortality_rate)
## [1] 0.6418885
ggplot(ny_data, mapping = aes(y = POVERTY_STATUS_Below_100_Percent_Level, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Basic Regression NY Poverty (100% to 145%) and Covid Mortality Rate Insignificant within NY counties

model_death_poverty_100to149_ny <- lm(covid_mortality_rate ~ POVERTY_STATUS_100_to_149_Percent_Level, data = ny_data)
get_regression_table(model_death_poverty_100to149_ny)
get_regression_summaries(model_death_poverty_100to149_ny)
cor(ny_data$POVERTY_STATUS_100_to_149_Percent_Level, ny_data$covid_mortality_rate)
## [1] 0.6309539
ggplot(ny_data, mapping = aes(y = POVERTY_STATUS_100_to_149_Percent_Level, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Basic Regression NY Poverty (Above 150%) and Covid Mortality Rate Insignificant within NY counties

model_death_poverty_above150_ny <- lm(covid_mortality_rate ~ POVERTY_STATUS_Above_150_Percent_Level, data = ny_data)
get_regression_table(model_death_poverty_above150_ny)
get_regression_summaries(model_death_poverty_above150_ny)
cor(ny_data$POVERTY_STATUS_Above_150_Percent_Level, ny_data$covid_mortality_rate)
## [1] -0.6511937
ggplot(ny_data, mapping = aes(y = POVERTY_STATUS_Above_150_Percent_Level, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Quick check on California - highest poverty population Insignificant within California counties

model_death_poverty_below100_ca <- lm(covid_mortality_rate ~ POVERTY_STATUS_Below_100_Percent_Level, data = ca_data)
get_regression_table(model_death_poverty_below100_ca)
get_regression_summaries(model_death_poverty_below100_ca)
cor(ny_data$POVERTY_STATUS_Below_100_Percent_Level, ny_data$covid_mortality_rate)
## [1] 0.6418885
ggplot(ca_data, mapping = aes(y = POVERTY_STATUS_Below_100_Percent_Level, x = covid_mortality_rate), color = COUNTY) +
  geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

##Age

ny_demographics_age <- age_proportions %>% filter(REGION == "New York") %>%
                                          select(COUNTY, MedianAge2010, 
                                          proportion_fmless_5,
                                          proportion_mless_5, 
                                          proportion_fm5to9,
                                          proportion_m5to9,
                                          proportion_m10to14,
                                          proportion_fm10to14,
                                          proportion_m15to19,
                                          proportion_fm15to19,
                                          proportion_m20to24,
                                          proportion_fm20to24,
                                          proportion_m25to29,
                                          proportion_fm25to29,
                                          proportion_m30to34,
                                          proportion_fm30to34,
                                          proportion_m35to44,
                                          proportion_fm35to44,
                                          proportion_m45to54,
                                          proportion_fm45to54,
                                          proportion_m55to59,
                                          proportion_fm55to59,
                                          proportion_m60to64,
                                          proportion_fm60to64,
                                          proportion_m65to74,
                                          proportion_fm65to74,
                                          proportion_m75to84,
                                          proportion_fm75to84,
                                          proportion_mover84,
                                          proportion_fmover84)
write.csv(ny_demographics_age,"C:\\Users\\User\\Desktop\\ny_demographics_age.csv", row.names = FALSE)

#Age by State Under5 Insignificant at the National level

model_death_age_under5 <- lm(covid_mortality_rate ~ proportion_fmless_5 + proportion_mless_5, data = death_rate)
get_regression_table(model_death_age_under5)
get_regression_summaries(model_death_age_under5)
ggplot(death_rate) + geom_point(mapping = aes(y = proportion_fmless_5, x = covid_mortality_rate),color = "red") +
  geom_point(mapping = aes(y=proportion_mless_5, x = covid_mortality_rate), color = "blue")

Multivariate Regression 5 to 9 male and female interaction Insignificant at the National level

model_death_age_5to9 <- lm(covid_mortality_rate ~ proportion_fm5to9 * proportion_m5to9, data = death_rate)
get_regression_table(model_death_age_5to9)
get_regression_summaries(model_death_age_5to9)
ggplot(death_rate) + geom_point(mapping = aes(y = proportion_fm5to9, x = covid_mortality_rate),color = "red") +
  geom_point(mapping = aes(y=proportion_m5to9, x = covid_mortality_rate), color = "blue")

Multivariate Regression 10 to 14 male and female interaction Insignificant at the national level

model_death_age_10to14 <- lm(covid_mortality_rate ~ proportion_fm10to14 * proportion_m10to14, data = death_rate)
get_regression_table(model_death_age_10to14)
get_regression_summaries(model_death_age_10to14)
ggplot(death_rate) + geom_point(mapping = aes(y = proportion_fm10to14, x = covid_mortality_rate),color = "red") +
  geom_point(mapping = aes(y=proportion_m10to14, x = covid_mortality_rate), color = "blue")

Multivariate Regression 15 to 19 male and female interaction Insignificant at the national level

model_death_age_15to19 <- lm(covid_mortality_rate ~ proportion_fm15to19 * proportion_m15to19, data = death_rate)
get_regression_table(model_death_age_15to19)
get_regression_summaries(model_death_age_15to19)
ggplot(death_rate) + geom_point(mapping = aes(y = proportion_fm15to19, x = covid_mortality_rate),color = "red") +
  geom_point(mapping = aes(y=proportion_m15to19, x = covid_mortality_rate), color = "blue")

Multivariate Regression 20 to 24 male and female interaction the proportion of males 20-24 is the only significant variable. There is a weak negative relationship between males in this age group and covid mortality rate. as mortality rate increases, the proportion of males in this age group decreases. Only about 5% of the variance is explained by the model, so this is not a great model for predicting covid mortality rate

model_death_age_20to24 <- lm(covid_mortality_rate ~ proportion_fm20to24 * proportion_m20to24, data = death_rate)
get_regression_table(model_death_age_20to24)
get_regression_summaries(model_death_age_20to24)
cor(death_rate$proportion_m20to24, death_rate$covid_mortality_rate)
## [1] -0.1877392
ggplot(death_rate) + geom_point(mapping = aes(y = proportion_fm20to24, x = covid_mortality_rate),color = "red") +
  geom_point(mapping = aes(y=proportion_m20to24, x = covid_mortality_rate), color = "blue")

Multivariate Regression 25 to 29 male and female interaction female data in this age range is missing

the proportion of males 25-29 is significant variable and mortality rate. There is a weak negative relationship (-0.25) between males in this age group and covid
mortality rate. As mortality rate increases, the proportion of males in this age group decreases. Only about 17% of the variance is explained by the model, so this is not a great model for predicting covid mortality rate.

model_death_age_25to29 <- lm(covid_mortality_rate ~ proportion_fm25to29 * proportion_m25to29, data = death_rate)
get_regression_table(model_death_age_25to29)
get_regression_summaries(model_death_age_25to29)
cor(death_rate$proportion_m25to29, death_rate$covid_mortality_rate)
## [1] -0.2582111
ggplot(death_rate) + geom_point(mapping = aes(y = proportion_fm25to29, x = covid_mortality_rate),color = "red") +
  geom_point(mapping = aes(y=proportion_m25to29, x = covid_mortality_rate), color = "blue")

Multivariate Regression 30 to 34 male and female In this plot, its possible there might be a non-linear relationship between the variables. Considering this fact witht the previous models, it would be beneficial to remodel these variables with a non-linear regression. I attempted to do so for this age group, however I am not familiar with creating nonlinear regression models in R and was unsuccessful in analyzing it.

model_death_age_fm30to34 <- lm(covid_mortality_rate ~ proportion_fm30to34, data = death_rate)
model_death_age_m30to34 <-  lm(covid_mortality_rate ~ proportion_m30to34, data = death_rate)
get_regression_table(model_death_age_fm30to34)
get_regression_summaries(model_death_age_fm30to34)
get_regression_table(model_death_age_m30to34)
get_regression_summaries(model_death_age_m30to34)
ggplot(death_rate) + geom_point(mapping = aes(y = proportion_fm30to34, x = covid_mortality_rate),color = "red") +
  geom_point(mapping = aes(y=proportion_m30to34, x = covid_mortality_rate), color = "blue")

Transformed the data and tried a nonlinear model.

death_rate$proportion_fm30to34_squared <- death_rate$proportion_fm30to34^2
death_rate$proportion_m30to34_squared <- death_rate$proportion_m30to34^2

model_death_age_fm30to34_quadratic <- lm(covid_mortality_rate ~ poly(proportion_fm30to34, 2, raw = TRUE), data = death_rate)
model_death_age_m30to34_quadratic <- lm(covid_mortality_rate ~ poly(proportion_m30to34, 2, raw = TRUE), data = death_rate)

get_regression_table(model_death_age_fm30to34_quadratic)
#get_regression_summaries(model_death_age_fm30to34_quadratic)

get_regression_table(model_death_age_m30to34_quadratic)
#get_regression_summaries(model_death_age_m30to34_quadratic)

ggplot(death_rate) + geom_point(mapping = aes(y = proportion_fm30to34, x = covid_mortality_rate),color = "red") #+ stat_smooth(method = lm, formula = y ~ poly(x, 2, raw = TRUE))

ggplot(death_rate) + geom_point(mapping = aes(y=proportion_m30to34, x = covid_mortality_rate), color = "blue") #+ stat_smooth(method = lm, formula = y ~ poly(x, 2, raw = TRUE))

####ICU beds, mortality, and over 60

Selected columns in Question

death_beds_60 <- death_rate_over_60 %>% select(REGION, COUNTY, `#Hospitals`, `#ICU_beds`, proportion_fmover60, proportion_mover60, tot_cases, tot_deaths, covid_mortality_rate) 

write.csv(death_beds_60, "C:\\Users\\User\\Desktop\\death_beds_60.csv", row.names = FALSE)

Basic Regression Lines for males and females over 60 and total cases

death_beds_60
ggplot(death_beds_60, aes(x=proportion_fmover60, y=tot_cases)) + geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

ggplot(death_beds_60, aes(x=proportion_mover60, y=tot_cases)) + geom_point() + geom_smooth(method = 'lm', se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Proportion of male and females over 60 and ICU Beds

ggplot() + geom_point(death_beds_60, mapping = aes(x=proportion_fmover60, y = `#ICU_beds`), color = "red") + geom_point(death_beds_60, mapping = aes(x=proportion_mover60, y = `#ICU_beds`), color = "blue")

Multivariate Regression Females Over 60 and ICU Beds and Total Cases as interactions Insignificant at the National level

death_beds_fm60_model <- lm(proportion_fmover60 ~ `#ICU_beds`*tot_cases, data = death_beds_60)
get_regression_table(death_beds_fm60_model)
get_regression_summaries(death_beds_fm60_model)
get_correlation(death_beds_60, proportion_fmover60 ~ `#ICU_beds`)
get_correlation(death_beds_60, proportion_fmover60 ~ tot_cases)

Multivariate Regression Males Over 60 and ICU Beds and Total Cases as interactions Insignificant at the National level

death_beds_m60_model <- lm(proportion_mover60 ~ `#ICU_beds`*tot_cases, data = death_beds_60)
get_regression_table(death_beds_m60_model)
get_regression_summaries(death_beds_m60_model)
get_correlation(death_beds_60, proportion_mover60 ~ `#ICU_beds`)
get_correlation(death_beds_60, proportion_mover60 ~ tot_cases)

Attempted matrixed scatter plots - unfamiliar so relied on R documentation, but unsuccessful ggpairs(death_beds_60, mapping = aes(death_beds_60$proportion_fmover60, death_beds_60$proportion_mover60)) too high of threshhold

####Indicators of Reduced Access to Care

reduced_medcare_indicators <- read_excel("C:/Users/User/Downloads/access_deny.xlsx")
## New names:
## * `` -> ...1
View(reduced_medcare_indicators)

National Estimates of Reduced Medical Care Indicators over 14 weeks (April 23, 20 - September 28, 20)

national_estimate <- reduced_medcare_indicators %>% filter(Group == "National Estimate")
national_estimate
ggplot(national_estimate, aes(x=Week, y=Value)) + geom_point() + facet_wrap(~ Indicator) #theme(axis.text.x = element_text(angle=60, hjust=1))

Age Reduced Medical Care Indicators Over 14 Weeks (April 23, 20 - September 28, 20)

age <- reduced_medcare_indicators %>% filter(Group == "By Age")
age
ggplot(age, aes(x=Week, y=Value, color = Subgroup)) + geom_point() + facet_wrap(~ Indicator) 

Gender Reduced Medical Care Indicators Over 14 Weeks (April 23, 20 - September 28, 20)

gender <- reduced_medcare_indicators %>% filter(Group == "By Gender")
gender
ggplot(gender, aes(x=Week, y=Value, color = Subgroup)) + geom_point() + facet_wrap(~ Indicator)

Race Reduced Medical Care Indicators Over 14 Weeks (April 23, 20 - September 28, 20)

race <- reduced_medcare_indicators %>% filter(Group == "By Race/Hispanic ethnicity") %>% group_by(Subgroup)
race
ggplot(race, aes(x=Week, y=Value, color = Subgroup)) + geom_point() + facet_wrap(~ Indicator)

Education Reduced Medical Care Indicators Over 14 Weeks (April 23, 20 - September 28, 20)

education <- reduced_medcare_indicators %>% filter(Group == "By Education")
education
ggplot(education, aes(x=Week, y=Value, color = Subgroup)) + geom_point() + facet_wrap(~ Indicator)

State Reduced Medical Care Indicators Over 14 Weeks (April 23, 20 - September 28, 20)

state <- reduced_medcare_indicators %>% filter(Group == "By State")
state
ggplot(state, aes(x=Week, y=Value)) + geom_point(alpha = 0.2) + facet_wrap(~ Indicator)

Sorted data set by highest value indicator - Delayed or Did not Get Care, Last 4 weeks

delayed_and_didnot_get_care <- reduced_medcare_indicators %>% filter(Indicator == "Delayed or Did Not Get Care, Last 4 Weeks")
delayed_and_didnot_get_care
write.csv(delayed_and_didnot_get_care,"C:\\Users\\User\\Desktop\\delayed_and_didnot_get_care.csv", row.names = FALSE)

This data was given to Trey to analyze and create additional visualizations through Python